From 8ae71c0646f17428e172d239be6c2ba1041c6934 Mon Sep 17 00:00:00 2001 From: Tim Deegan Date: Thu, 8 Mar 2007 16:40:35 +0000 Subject: [PATCH] [HVM] Save/restore: tidy PFN handling in the tools. Make the differences clearer between the number of frames, the max PFN and the size of the pfn array, and handle te framebuffer more cleanly. Signed-off-by: Tim Deegan --- tools/libxc/xc_hvm_restore.c | 57 +++++------ tools/libxc/xc_hvm_save.c | 120 ++++++++++++++++-------- tools/libxc/xenguest.h | 1 + tools/python/xen/xend/XendCheckpoint.py | 7 +- 4 files changed, 116 insertions(+), 69 deletions(-) diff --git a/tools/libxc/xc_hvm_restore.c b/tools/libxc/xc_hvm_restore.c index a5e51e84e8..0dafa82874 100644 --- a/tools/libxc/xc_hvm_restore.c +++ b/tools/libxc/xc_hvm_restore.c @@ -101,6 +101,9 @@ int xc_hvm_restore(int xc_handle, int io_fd, /* Number of pages of memory the guest has. *Not* the same as max_pfn. */ unsigned long nr_pages; + /* The size of an array big enough to contain all guest pfns */ + unsigned long pfn_array_size = max_pfn + 1; + /* hvm guest mem size (Mb) */ memsize = (unsigned long long)*store_mfn; v_end = memsize << 20; @@ -127,7 +130,7 @@ int xc_hvm_restore(int xc_handle, int io_fd, } - pfns = malloc(max_pfn * sizeof(xen_pfn_t)); + pfns = malloc(pfn_array_size * sizeof(xen_pfn_t)); if (pfns == NULL) { ERROR("memory alloc failed"); errno = ENOMEM; @@ -139,11 +142,11 @@ int xc_hvm_restore(int xc_handle, int io_fd, goto out; } - for ( i = 0; i < max_pfn; i++ ) + for ( i = 0; i < pfn_array_size; i++ ) pfns[i] = i; - for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < max_pfn; i++ ) + for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < pfn_array_size; i++ ) pfns[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; - arch_max_pfn = pfns[max_pfn - 1];/* used later */ + arch_max_pfn = pfns[max_pfn];/* used later */ /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */ rc = xc_domain_memory_populate_physmap( @@ -297,29 +300,6 @@ int xc_hvm_restore(int xc_handle, int io_fd, *store_mfn = (v_end >> PAGE_SHIFT) - 2; DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n", *store_mfn, v_end); - /* restore hvm context including pic/pit/shpage */ - if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) { - ERROR("error read hvm context size!\n"); - goto out; - } - - hvm_buf = malloc(rec_len); - if (hvm_buf == NULL) { - ERROR("memory alloc for hvm context buffer failed"); - errno = ENOMEM; - goto out; - } - - if (!read_exact(io_fd, hvm_buf, rec_len)) { - ERROR("error read hvm buffer!\n"); - goto out; - } - - if (( rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len))) { - ERROR("error set hvm buffer!\n"); - goto out; - } - if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) { ERROR("error read nr vcpu !\n"); goto out; @@ -347,6 +327,29 @@ int xc_hvm_restore(int xc_handle, int io_fd, } } + /* restore hvm context including pic/pit/shpage */ + if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) { + ERROR("error read hvm context size!\n"); + goto out; + } + + hvm_buf = malloc(rec_len); + if (hvm_buf == NULL) { + ERROR("memory alloc for hvm context buffer failed"); + errno = ENOMEM; + goto out; + } + + if (!read_exact(io_fd, hvm_buf, rec_len)) { + ERROR("error read hvm buffer!\n"); + goto out; + } + + if (( rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len))) { + ERROR("error set hvm buffer!\n"); + goto out; + } + /* Shared-info pfn */ if (!read_exact(io_fd, &(shared_info_frame), sizeof(uint32_t)) ) { ERROR("reading the shared-info pfn failed!\n"); diff --git a/tools/libxc/xc_hvm_save.c b/tools/libxc/xc_hvm_save.c index c422e296e6..ac06db72a1 100644 --- a/tools/libxc/xc_hvm_save.c +++ b/tools/libxc/xc_hvm_save.c @@ -54,9 +54,6 @@ static unsigned long hvirt_start; /* #levels of page tables used by the current guest */ static unsigned int pt_levels; -/* total number of pages used by the current guest */ -static unsigned long max_pfn; - int xc_hvm_drain_io(int handle, domid_t dom) { DECLARE_HYPERCALL; @@ -80,7 +77,7 @@ int xc_hvm_drain_io(int handle, domid_t dom) */ #define BITS_PER_LONG (sizeof(unsigned long) * 8) -#define BITMAP_SIZE ((max_pfn + BITS_PER_LONG - 1) / 8) +#define BITMAP_SIZE ((pfn_array_size + BITS_PER_LONG - 1) / 8) #define BITMAP_ENTRY(_nr,_bmap) \ ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] @@ -202,7 +199,7 @@ static int print_stats(int xc_handle, uint32_t domid, int pages_sent, return 0; } -static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn, +static int analysis_phase(int xc_handle, uint32_t domid, int pfn_array_size, unsigned long *arr, int runs) { long long start, now; @@ -215,7 +212,7 @@ static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn, int i; xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN, - arr, max_pfn, NULL, 0, NULL); + arr, pfn_array_size, NULL, 0, NULL); DPRINTF("#Flush\n"); for ( i = 0; i < 40; i++ ) { usleep(50000); @@ -255,7 +252,7 @@ static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend) - return 0; // success + return 0; // success if (info->paused) { // try unpausing domain, wait, and retest @@ -287,11 +284,21 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, int rc = 1, i, j, last_iter, iter = 0; int live = (flags & XCFLAGS_LIVE); int debug = (flags & XCFLAGS_DEBUG); + int stdvga = (flags & XCFLAGS_STDVGA); int sent_last_iter, skip_this_iter; + /* The highest guest-physical frame number used by the current guest */ + unsigned long max_pfn; + + /* The size of an array big enough to contain all guest pfns */ + unsigned long pfn_array_size; + /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; + /* Other magic frames: ioreqs and xenstore comms */ + unsigned long ioreq_pfn, bufioreq_pfn, store_pfn; + /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; @@ -310,7 +317,7 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t rec_size, nr_vcpus; - /* power of 2 order of max_pfn */ + /* power of 2 order of pfn_array_size */ int order_nr; /* bitmap of pages: @@ -372,6 +379,15 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, goto out; } + if ( xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, &store_pfn) + || xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, &ioreq_pfn) + || xc_get_hvm_param(xc_handle, dom, + HVM_PARAM_BUFIOREQ_PFN, &bufioreq_pfn) ) + { + ERROR("HVM: Could not read magic PFN parameters"); + goto out; + } + DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, " "nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); @@ -387,6 +403,7 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, } last_iter = 0; + DPRINTF("hvm domain live migration debug start: logdirty enable.\n"); } else { /* This is a non-live suspend. Issue the call back to get the @@ -407,20 +424,28 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, /* Calculate the highest PFN of "normal" memory: * HVM memory is sequential except for the VGA and MMIO holes. */ - max_pfn = info.nr_pages; + max_pfn = info.nr_pages - 1; + /* If the domain has a Cirrus framebuffer and we haven't already + * suspended qemu-dm, it will have 8MB of framebuffer memory + * still allocated, which we don't want to copy: qemu will save it + * for us later */ + if ( live && !stdvga ) + max_pfn -= 0x800; /* Skip the VGA hole from 0xa0000 to 0xc0000 */ - max_pfn += 0x20; + max_pfn += 0x20; /* Skip the MMIO hole: 256MB just below 4GB */ if ( max_pfn >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) ) max_pfn += (HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT); - skip_this_iter = 0;/*XXX*/ + /* Size of any array that covers 0 ... max_pfn */ + pfn_array_size = max_pfn + 1; + /* pretend we sent all the pages last iteration */ - sent_last_iter = max_pfn; + sent_last_iter = pfn_array_size; - /* calculate the power of 2 order of max_pfn, e.g. + /* calculate the power of 2 order of pfn_array_size, e.g. 15->4 16->4 17->5 */ - for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++) + for (i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++) continue; /* Setup to_send / to_fix and to_skip bitmaps */ @@ -454,7 +479,7 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, return 1; } - analysis_phase(xc_handle, dom, max_pfn, to_skip, 0); + analysis_phase(xc_handle, dom, pfn_array_size, to_skip, 0); /* We want zeroed memory so use calloc rather than malloc. */ @@ -484,9 +509,17 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, DPRINTF("Saving HVM domain memory pages: iter %d 0%%", iter); - while( N < max_pfn ){ + if (last_iter && (max_pfn != live_shinfo->arch.max_pfn)) { + DPRINTF("calculated max_pfn as %#lx, shinfo says %#lx\n", + max_pfn, live_shinfo->arch.max_pfn); + ERROR("Max pfn doesn't match shared info"); + goto out; + } + + while( N < pfn_array_size ){ - unsigned int this_pc = (N * 100) / max_pfn; + unsigned int this_pc = (N * 100) / pfn_array_size; + int rc; if ((this_pc - prev_pc) >= 5) { DPRINTF("\b\b\b\b%3d%%", this_pc); @@ -495,9 +528,9 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, /* slightly wasteful to peek the whole array evey time, but this is fast enough for the moment. */ - if (!last_iter && xc_shadow_control( - xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, - to_skip, max_pfn, NULL, 0, NULL) != max_pfn) { + if (!last_iter && (rc = xc_shadow_control( + xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, + pfn_array_size, NULL, 0, NULL)) != pfn_array_size) { ERROR("Error peeking HVM shadow bitmap"); goto out; } @@ -505,11 +538,11 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, /* load pfn_batch[] with the mfn of all the pages we're doing in this batch. */ - for (batch = 0; batch < MAX_BATCH_SIZE && N < max_pfn ; N++) { + for (batch = 0; batch < MAX_BATCH_SIZE && N < pfn_array_size; N++){ - int n = permute(N, max_pfn, order_nr); + int n = permute(N, pfn_array_size, order_nr); - if (debug) { + if (0&&debug) { DPRINTF("%d pfn= %08lx %d \n", iter, (unsigned long)n, test_bit(n, to_send)); } @@ -524,7 +557,10 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, /* Skip PFNs that aren't really there */ if ((n >= 0xa0 && n < 0xc0) /* VGA hole */ || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) - && n < (1ULL << 32) >> PAGE_SHIFT)) /* 4G MMIO hole */ + && n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ + || n == store_pfn + || n == ioreq_pfn + || n == bufioreq_pfn) continue; /* @@ -589,7 +625,7 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, print_stats( xc_handle, dom, sent_this_iter, &stats, 1); DPRINTF("Total pages sent= %ld (%.2fx)\n", - total_sent, ((float)total_sent)/max_pfn ); + total_sent, ((float)total_sent)/pfn_array_size ); } if (last_iter && debug){ @@ -616,7 +652,7 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || (iter >= max_iters) || (sent_this_iter+skip_this_iter < 50) || - (total_sent > max_pfn*max_factor) ) { + (total_sent > pfn_array_size*max_factor) ) { DPRINTF("Start last iteration for HVM domain\n"); last_iter = 1; @@ -635,7 +671,8 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, if (xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, - max_pfn, NULL, 0, &stats) != max_pfn) { + pfn_array_size, NULL, + 0, &stats) != pfn_array_size) { ERROR("Error flushing shadow PT"); goto out; } @@ -659,20 +696,6 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, goto out; } - if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, - hvm_buf_size)) == -1) { - ERROR("HVM:Could not get hvm buffer"); - goto out; - } - - if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) { - ERROR("error write hvm buffer size"); - goto out; - } - - if ( !write_exact(io_fd, hvm_buf, rec_size) ) { - ERROR("write HVM info failed!\n"); - } /* save vcpu/vmcs context */ if (!write_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) { @@ -701,6 +724,21 @@ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, } } + if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, + hvm_buf_size)) == -1) { + ERROR("HVM:Could not get hvm buffer"); + goto out; + } + + if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) { + ERROR("error write hvm buffer size"); + goto out; + } + + if ( !write_exact(io_fd, hvm_buf, rec_size) ) { + ERROR("write HVM info failed!\n"); + } + /* Shared-info pfn */ if (!write_exact(io_fd, &(shared_info_frame), sizeof(uint32_t)) ) { ERROR("write shared-info pfn failed!\n"); diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h index 0f77217978..5b6e7c6d2a 100644 --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -12,6 +12,7 @@ #define XCFLAGS_LIVE 1 #define XCFLAGS_DEBUG 2 #define XCFLAGS_HVM 4 +#define XCFLAGS_STDVGA 8 /** diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py index e94d9fbde7..92ba37d4fd 100644 --- a/tools/python/xen/xend/XendCheckpoint.py +++ b/tools/python/xen/xend/XendCheckpoint.py @@ -74,9 +74,13 @@ def save(fd, dominfo, network, live, dst, checkpoint=False): image_cfg = dominfo.info.get('image', {}) hvm = image_cfg.has_key('hvm') + stdvga = 0 if hvm: log.info("save hvm domain") + if image_cfg['hvm']['devices']['stdvga'] == 1: + stdvga = 1 + # xc_save takes three customization parameters: maxit, max_f, and # flags the last controls whether or not save is 'live', while the # first two further customize behaviour when 'live' save is @@ -84,7 +88,8 @@ def save(fd, dominfo, network, live, dst, checkpoint=False): # libxenguest; see the comments and/or code in xc_linux_save() for # more information. cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd), - str(dominfo.getDomid()), "0", "0", str(int(live) | (int(hvm) << 2)) ] + str(dominfo.getDomid()), "0", "0", + str(int(live) | (int(hvm) << 2) | (int(stdvga) << 3)) ] log.debug("[xc_save]: %s", string.join(cmd)) def saveInputHandler(line, tochild): -- 2.30.2